library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(magrittr)
library(ggthemes)
library(reshape2)
library(stringr)
library(readr)
library(stringi)
shark <- read.csv("attacks.csv")
shark <- select(shark, -c(pdf, href.formula, href, Case.Number.1, Case.Number.2, original.order, X, X.1))
shark <- shark %>% filter(!(is.na(Year)))
'%!in%' <- function(x,y)!('%in%'(x,y)) #fa l'opposto di %in%colnames(shark) <- c("Case Number", "Date", "Year", "Type", "Country", "Area", "Location",
"Activity", "Name", "Sex", "Age", "Injury", "Fatal_Y_N", "Time", "Species", "Investigator or Source")
temp = shark %>% filter(Year != 0) #rimuove righe che hanno come anno 0
Tempy <- shark$Area
Tempy %<>% tbl_df() %>% filter(value != "") #lista-->tibble
Tempy %<>% arrange((value)) #riordinaYear_Shark_Year <- count(group_by(temp, Year)) %>% #tabella con colonne (anno, numero attacchi)
filter(Year %!in% c(5, 77, 500)) %>%
filter(Year > 1979 && Year < 2018) %>%
ggplot() +
geom_bar(mapping = aes(reorder(Year, Year),y = n, fill=n), stat="identity") +
scale_fill_gradient("Attacchi", low="yellow", high = "red") +
theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Numero attacchi di squalo per anno", x = "Anno", y = "Numero di attacchi", fill = "Attacchi")Shark_Year <-
count(group_by(temp, Country)) %>%
filter(n > 58) %>%
ggplot() +
geom_bar(mapping = aes(reorder(Country, n),y = n, fill=n), stat="identity") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
scale_fill_gradient("Attacchi", low="orange", high = "red") +
labs(title = "Numero attacchi di squalo per paese", x = "Paese", y = "Numero di attacchi", fill = "Attachi") + coord_flip()Yes_No <- c("Y", "N")
Fatal <- count(group_by(temp, Fatal_Y_N)) %>%
filter(Fatal_Y_N %in% Yes_No) #rimuove eventuali altri gruppi
Fatal$sum = sum(Fatal$n)
Fatal$Percentage = 0
for(i in 1:nrow(Fatal)){
Fatal$Percentage[i] <- Fatal$n[i]/Fatal$sum[i] #calcolo percentuale per riga
}attach(Fatal)
plot_ly(Fatal, labels=Fatal_Y_N, values = Percentage, type="pie", marker = list(colors = c('yellowgreen', '#DF2B0D'))) %>%
layout(title = "Fatalità degli attacchi di squalo in percentuale", paper_bgcolor='#f8f4f4')attack <- temp %>% select(c(Area, Fatal_Y_N))
attack$Fatal <- attack$Fatal_Y_N
attack %<>% filter(Area != "") #rimuove righe con campo Area vuoto
attack %<>% arrange(Area)
overall_tally <- count(group_by(attack, Area)) #teniamo solo aree che hanno subito più di 50 attacchi
overall_tally %<>% filter(n > 100)
names <- overall_tally$Area %>% unique() %>% dput() %>% invisible()
names %<>% tbl_df() #converte a tibble
area_attack_fatal <- count(group_by(attack, Area, Fatal)) #raggruppa in base alle aree e alla fatalità dell'attaccoarea_attack_fatal1 <- area_attack_fatal
area_attack_fatal1 %<>% filter(Area %in% names$value)
area_attack_fatal1 %<>% filter(Fatal %in% Yes_No)
area_attack_fatal1## # A tibble: 22 x 3
## # Groups: Area, Fatal [22]
## Area Fatal n
## <chr> <chr> <int>
## 1 California N 240
## 2 California Y 18
## 3 Eastern Cape Province N 125
## 4 Eastern Cape Province Y 22
## 5 Florida N 911
## 6 Florida Y 45
## 7 Hawaii N 211
## 8 Hawaii Y 46
## 9 KwaZulu-Natal N 123
## 10 KwaZulu-Natal Y 43
## # ... with 12 more rows
Attack_Area <- area_attack_fatal1 %>%
ggplot() +
geom_bar(mapping = aes(reorder(Area, n), y=n, fill=Fatal), position="dodge", stat='identity') +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
scale_fill_manual(values = c('yellowgreen', '#DF2B0D'))+
labs(title = "Numero attacchi squalo per area", x = "Area", y = "Numero di attacchi", fill = "Fatalità ") + coord_flip()#NUMBER OF SHARK ATTACKS VS. COUNTRY VS. FATALITY
attack_by_country <- temp %>% select(c(Country, Fatal_Y_N))
attack_by_country %<>% filter(Fatal_Y_N %in% Yes_No)
attack_by_country$Fatal <- attack_by_country$Fatal_Y_N
attack_by_country %<>% filter(Country != "")
attack_by_country %<>% arrange(Country)
overall_tally_country <- count(group_by(attack_by_country, Country)) #paese|numero attacchi totale
overall_tally_country %<>% filter(n > 30)
names_country <- overall_tally_country$Country %>% unique() %>% dput()
names_country %<>% tbl_df()
Country_attack_by_country_fatal <- tally(group_by(attack_by_country, Country, Fatal))Country_attack_by_country_fatal %<>% filter(Country %in% names_country$value)
Country_attack_by_country_fatal1 <- Country_attack_by_country_fatal
Country_attack_by_country_fatal1 %<>% select(c(Country, Fatal, n))
Country_attack_by_country_fatal1$sum = 0
for(i in 1:nrow(Country_attack_by_country_fatal1)){ #somma correttà solo sui Fatal=N
Country_attack_by_country_fatal1$sum[i] <- Country_attack_by_country_fatal$n[i] + Country_attack_by_country_fatal$n[i+1]
}
for(i in 1:nrow(Country_attack_by_country_fatal1)){ #correggiamo la somma sui Fatal=Y
if(i %% 2 == 0){
Country_attack_by_country_fatal1$sum[i] <- Country_attack_by_country_fatal1$sum[i-1]
}
}
Shark_Fatal_Country <- Country_attack_by_country_fatal1
Shark_Fatal_Country$Survive = 0
for(i in 1:nrow(Shark_Fatal_Country)){
Shark_Fatal_Country$Survive[i] <- Shark_Fatal_Country$n[i]/Shark_Fatal_Country$sum[i]
}
Shark_Fatal_Country$Death = 0
for(i in 1:nrow(Shark_Fatal_Country)){
if(i %% 2 == 0){
Shark_Fatal_Country$Death[i] <- 1-Shark_Fatal_Country$Survive[i-1]
}
}Shark_Fatal_Country_Plot_Die <- Shark_Fatal_Country %>%
filter(Fatal == "Y") %>%
ggplot() +
geom_bar(mapping = aes(reorder(Country, -Death), y=Death, fill=Death, text=paste("Number of Deaths: ", n, sep="")), stat='identity') +
scale_fill_gradient("Chance", low="yellow", high = "red") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.text.x = element_text(angle = 90))+
labs(title = "Percent Chance of Death from Shark Attack (By Country)", x = "Country", y = "Percent Chance of Death from Shark Attack")Shark_Fatal_Country_perc <- Shark_Fatal_Country
Shark_Fatal_Country_perc$Survive <- Shark_Fatal_Country_perc$Survive * 100
Shark_Fatal_Country_Plot_Survive <- Shark_Fatal_Country_perc %>%
filter(Fatal == "N") %>%
ggplot() +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
geom_bar(mapping = aes(reorder(Country, Survive), y=Survive, fill=Survive, text=paste("Number of Survivals: ", n, sep="")), stat='identity') +
scale_fill_gradient("Count", low="red", high = "yellowgreen") +
labs(title = "Percent Chance of Survival By Shark Attack (By Country)", x = "Country", y = "Survival Rate of Shark Attack (in %)") + coord_flip()Attack_Country <- Country_attack_by_country_fatal1 %>%
ggplot() +
geom_bar(mapping = aes(reorder(Country, n), y=n, fill=Fatal), position='dodge', stat='identity') +
scale_fill_manual(values = c('yellowgreen', '#DF2B0D'))+
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
labs(title = "Number of Shark Bites: Fatal and Non Fatal", x = "Country", y = "Number of Shark Bites", fill = "Fatal?") + coord_flip()Attack_Age <-
count(group_by(temp, Age)) %>%
filter(Age != "") %>%
filter(n > 9) %>%
filter(n != 2568) %>%
filter(Age %!in% c(1,2,3,4,5,6,7,8,9,61,69)) %>%
ggplot() +
geom_bar(mapping = aes(Age, y=n, fill=n), stat="identity") +
scale_fill_gradient("Count", low="yellow", high = "red") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
labs(title = "Number of Shark Attacks vs. Age of Victim", x = "Age of Victim", y = "Number of Shark Attacks", fill = "Count")shark2 <- shark
shark2 %<>% filter(Fatal_Y_N %in% c("Y", "N"))
top_30_activity <- tally(group_by(shark2,Activity)) %>%
arrange(desc(n)) %>%
filter(Activity != "") %>%
select(Activity,n)
top_30_activity %<>% slice(1:30)
activities <- top_30_activity$Activity %>% unique() %>% dput()
activities %<>% tbl_df()
Activity_Fatal <- count(group_by(shark2,Activity,Fatal_Y_N)) %>%
arrange(desc(n)) %>%
filter(Activity != "")
Activity_Fatal %<>% filter(Activity %in% activities$value)
Activity_Fatal_Plot <- Activity_Fatal %>%
ggplot() +
geom_bar(mapping = aes(reorder(Activity, -n), y=n, fill=Fatal_Y_N), position ='dodge', stat='identity') +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
scale_fill_manual(values = c('yellowgreen', '#DF2B0D'))+
theme(axis.text.x = element_text(angle = 90)) +
theme(plot.title = element_text(face = "bold")) +
labs(title = "Shark Attack Fatalities with Activity", x = "Activity", y = "Number of Shark Attacks", fill = "Fatal")Yes_Fatal <- Activity_Fatal
Yes_Fatal %<>% filter(Fatal_Y_N == "Y")
No_Fatal <- Activity_Fatal
No_Fatal %<>% filter(Fatal_Y_N == "N")
Both_Fatal <- inner_join(Yes_Fatal, No_Fatal, by ="Activity")
names(Both_Fatal) <- c("Activity","Fatal_Y_N.x", "Number_of_Fatalities","Fatal_Y_N.y", "Number_of_Non-Fatalities")
Both_Fatal %<>% remove_missing()
Both_Fatal %<>% select(-c(Fatal_Y_N.x, Fatal_Y_N.y))
Both_Fatal$Percent_Fatality = 0
for(i in 1:nrow(Both_Fatal)){
Both_Fatal[i, "Percent_Fatality"] <- Both_Fatal[i, "Number_of_Fatalities"]/(Both_Fatal[i, "Number_of_Fatalities"] + Both_Fatal[i, "Number_of_Non-Fatalities"])
}
Bar_Fatality_Percent <- Both_Fatal %>%
ggplot() +
geom_bar(mapping = aes(reorder(Activity, -Percent_Fatality), y=Percent_Fatality, fill = Percent_Fatality), stat="identity") +
scale_fill_gradient("Count", low="yellow", high = "red") +
scale_color_fivethirtyeight() +
theme_fivethirtyeight() +
theme(axis.text.x = element_text(angle = 90)) +
theme(plot.title = element_text(face = "bold")) +
labs(title = "Percent Frequency of Fatality with Shark Attacks in Relation to Activity", y = "Fatality Percent", fill = "Percent Fatality") +
theme(plot.title = element_text(size = 11)) +
labs(title = "Percent Fatality with Shark Attacks in Relation to Activity") +
labs(x = "Activity")